import matplotlib.pyplot as plt
import seaborn as sns
import os
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
import numpy as np
from scipy import stats
import sys
sys.path.append(sys.argv[1])

import pandas as pd
import pickle
from decision_company import read_csv_file, convert_to_datetime

# Load the dataset
atp_tennis = read_csv_file(os.path.join(sys.argv[1], 'atp_tennis.csv'))

# Convert the 'Date' column to datetime format
atp_tennis['Date'] = convert_to_datetime(atp_tennis['Date'])

print(atp_tennis['Date'])
# pickle.dump(atp_tennis['Date'],open("./ref_result/atp_tennis_Date.pkl","wb"))

import pandas as pd
import pickle
from decision_company import read_csv_file, get_max

# Load the dataset

# Find the last date in the dataset
last_date = get_max(atp_tennis['Date'])

print(last_date)
# pickle.dump(last_date,open("./ref_result/last_date.pkl","wb"))

import pandas as pd
import pickle
from decision_company import read_csv_file, create_date_offset, filter_by_date

# Load the dataset

# Filter the dataset to only include matches from the last five years
five_years_ago = last_date - create_date_offset(years=5)
recent_matches = filter_by_date(atp_tennis, 'Date', five_years_ago)

print(recent_matches)
# pickle.dump(recent_matches,open("./ref_result/recent_matches.pkl","wb"))

import pandas as pd
import pickle
from decision_company import read_csv_file, get_min_value, sort_by_values, get_first_n_rows, concatenate_objects, rename_columns, bind_dataframe

# Load the dataset

concatenated_data = concatenate_objects(
    recent_matches[['Player_1', 'Rank_1']],
    rename_columns(recent_matches[['Player_2', 'Rank_2']], {'Player_2': 'Player_1', 'Rank_2': 'Rank_1'})
)
grouped_data = bind_dataframe(concatenated_data, 'Player_1')
min_values = get_min_value(grouped_data)
sorted_data = sort_by_values(min_values, 'Rank_1')
top_10_ranked_players = get_first_n_rows(sorted_data, 10)

print(top_10_ranked_players)
# pickle.dump(top_10_ranked_players,open("./ref_result/top_10_ranked_players.pkl","wb"))

import pandas as pd
import pickle
from decision_company import read_csv_file, to_list, reset_index

# Load the dataset

# Convert the top 10 ranked players and their rankings to a Python list
top_10_ranked_players_list = to_list(reset_index(top_10_ranked_players, drop=False))

print(top_10_ranked_players_list)
# pickle.dump(top_10_ranked_players_list,open("./ref_result/top_10_ranked_players_list.pkl","wb"))



import pandas as pd
import numpy as np
import pickle
from decision_company import read_csv_file


# Remove any extra spaces from the player names
top_10_player_names = [player[0].strip() for player in top_10_ranked_players_list]

print(top_10_player_names)
pickle.dump(top_10_player_names,open("./ref_result/top_10_player_names.pkl","wb"))